bitkeeper revision 1.81 (3e561fbbcTcsPP9qTjC_BVR3zYpKjw)
authorkaf24@labyrinth.cl.cam.ac.uk <kaf24@labyrinth.cl.cam.ac.uk>
Fri, 21 Feb 2003 12:46:51 +0000 (12:46 +0000)
committerkaf24@labyrinth.cl.cam.ac.uk <kaf24@labyrinth.cl.cam.ac.uk>
Fri, 21 Feb 2003 12:46:51 +0000 (12:46 +0000)
Many files:
  Block-device layer all fixed up. Serialisation removed. Should fly! :-)

xen-2.4.16/drivers/block/ll_rw_blk.c
xen-2.4.16/drivers/block/xen_block.c
xen-2.4.16/include/hypervisor-ifs/block.h
xen-2.4.16/include/xeno/blkdev.h
xen-2.4.16/include/xeno/sched.h
xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c
xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c

index e644974eab3241f2ca1c9c75a8157e038e55d445..615b332c4b16005930f76c5ca6e4cc0e6a0ebffa 100644 (file)
@@ -5,7 +5,6 @@
  * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
  * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
  * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
- * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
  */
 
 /*
 #define DPRINTK(_f, _a...) ((void)0)
 #endif
 
-/* XXX SMH: temporarily we just dive at xen_block completion handler */
-extern void end_block_io_op(struct buffer_head *bh); 
-
-static void end_buffer_dummy(struct buffer_head *bh, int uptodate)
-{
-  /* do nothing */
-}
-
 /* This will die as all synchronous stuff is coming to an end */
 #define complete(_r) panic("completion.h stuff may be needed...")
 
@@ -1036,8 +1027,6 @@ out:
        return 0;
 end_io:
        bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
-       /* XXX SMH: do we need this every time? */
-       end_block_io_op(bh);
        return 0;
 }
 
@@ -1107,8 +1096,6 @@ void generic_make_request (int rw, struct buffer_head * bh)
 
                        /* Yecch again */
                        bh->b_end_io(bh, 0);
-                       /* XXX SMH */ 
-                       end_block_io_op(bh);
                        return;
                }
        }
@@ -1238,7 +1225,6 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
 
                /* We have the buffer lock */
                atomic_inc(&bh->b_count);
-               bh->b_end_io = end_buffer_dummy;
 
                switch(rw) {
                case WRITE:
@@ -1258,8 +1244,6 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
                        BUG();
        end_io:
                        bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
-                       /* XXX SMH */
-                       end_block_io_op(bh);
                        continue;
                }
 
@@ -1313,7 +1297,6 @@ int end_that_request_first (struct request *req, int uptodate, char *name)
                req->bh = bh->b_reqnext;
                bh->b_reqnext = NULL;
                bh->b_end_io(bh, uptodate);
-               end_block_io_op(bh);
                if ((bh = req->bh) != NULL) {
                        req->hard_sector += nsect;
                        req->hard_nr_sectors -= nsect;
index 177dc23bb00f32ced736425abdbc3cbdf17a43ac..bf70737dabfbbf54e4f9d3b6809fd29205e46f46 100644 (file)
 #define DPRINTK(_f, _a...) ((void)0)
 #endif
 
-typedef struct blk_request
-{
-    struct buffer_head *bh;
-    void               *id;
-    struct task_struct *domain;
-} blk_request_t;
-
+/*
+ * These are rather arbitrary. They are fairly large because adjacent
+ * requests pulled from a communication ring are quite likely to end
+ * up being part of the same scatter/gather request at the disc.
+ * It might be a good idea to add scatter/gather support explicitly to
+ * the scatter/gather ring (eg. each request has an array of N pointers);
+ * then these values would better reflect real costs at the disc.
+ */
 #define MAX_PENDING_REQS 32
 #define BATCH_PER_DOMAIN 8
 
-static kmem_cache_t *blk_request_cachep;
+static kmem_cache_t *buffer_head_cachep;
 static atomic_t nr_pending;
 
 static void io_schedule(unsigned long unused);
-static int do_block_io_op_domain(struct task_struct* task, int max_to_do);
+static int do_block_io_op_domain(struct task_struct *p, int max_to_do);
 static void dispatch_rw_block_io(struct task_struct *p, int index);
 static void dispatch_probe_block_io(struct task_struct *p, int index);
 static void dispatch_debug_block_io(struct task_struct *p, int index);
+static void make_response(struct task_struct *p, void *id, unsigned long st);
 
 
 /******************************************************************
@@ -104,6 +106,7 @@ static void io_schedule(unsigned long unused)
     struct task_struct *p;
     struct list_head *ent;
 
+    /* Queue up a batch of requests. */
     while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) &&
             !list_empty(&io_schedule_list) )
     {
@@ -113,10 +116,20 @@ static void io_schedule(unsigned long unused)
         if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) )
             add_to_blkdev_list_tail(p);
     }
+
+    /* Push the batch through to disc. */
+    run_task_queue(&tq_disk);
 }
 
 static void maybe_trigger_io_schedule(void)
 {
+    /*
+     * Needed so that two processes, who together make the following predicate
+     * true, don't both read stale values and evaluate the predicate
+     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+     */
+    smp_mb();
+
     if ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS/2)) &&
          !list_empty(&io_schedule_list) )
     {
@@ -127,51 +140,17 @@ static void maybe_trigger_io_schedule(void)
 
 
 /******************************************************************
- * COMPLETION CALLBACK -- XXX Hook properly into bh->b_end_io
+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
  */
 
-void end_block_io_op(struct buffer_head * bh)
+static void end_block_io_op(struct buffer_head *bh, int uptodate)
 {
-    unsigned long cpu_mask;
-    blk_request_t *blk_request = NULL;
-    unsigned long flags;
-    struct task_struct *p;
-    int position = 0;
-    blk_ring_t *blk_ring;
-
-    DPRINTK("XEN end_block_io_op, bh: %p\n", bh);
-    
-    if ( (blk_request = (blk_request_t *)bh->b_xen_request) == NULL ) 
-        goto bad_interrupt;
-
     atomic_dec(&nr_pending);
-    
-    p = blk_request->domain;
+    make_response(bh->b_xen_domain, bh->b_xen_id, uptodate ? 0 : 1);
 
-    /* Place on the response ring for the relevant domain. */ 
-    spin_lock_irqsave(&p->blk_ring_lock, flags);
-    blk_ring = p->blk_ring_base;
-    position = blk_ring->resp_prod;
-    blk_ring->resp_ring[position].id     = blk_request->id;
-    blk_ring->resp_ring[position].status = 0;
-    blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod);
-    spin_unlock_irqrestore(&p->blk_ring_lock, flags);
-    
-    /* Kick the relevant domain. */
-    cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
-    guest_event_notify(cpu_mask); 
-
-    /* Free state associated with this request. */
-    if ( blk_request->bh ) 
-        kfree(blk_request->bh);     
-    kmem_cache_free(blk_request_cachep, blk_request);
+    kmem_cache_free(buffer_head_cachep, bh);
 
     maybe_trigger_io_schedule();
-
-    return;
-
- bad_interrupt:
-    panic("Block IO interrupt received for unknown buffer [%p]\n", bh);
 }
 
 
@@ -193,55 +172,43 @@ long do_block_io_op(void)
  * DOWNWARD CALLS -- These interface with the block-device layer proper.
  */
 
-static int do_block_io_op_domain(struct task_struct* task, int max_to_do)
+static int do_block_io_op_domain(struct task_struct* p, int max_to_do)
 {
-    blk_ring_t *blk_ring = task->blk_ring_base;
-    int loop, more_to_do = 0;
-    int resp_ring_ents = 
-        (blk_ring->resp_prod - blk_ring->resp_cons) & (BLK_RESP_RING_SIZE - 1);
-
-    DPRINTK("XEN do_block_io_op %d %d\n",
-            blk_ring->req_cons, blk_ring->req_prod);
+    blk_ring_t *blk_ring = p->blk_ring_base;
+    int i, more_to_do = 0;
 
-    for ( loop = blk_ring->req_cons; 
-         loop != blk_ring->req_prod; 
-         loop = BLK_REQ_RING_INC(loop) ) 
+    for ( i = p->blk_req_cons; 
+         i != blk_ring->req_prod; 
+         i = BLK_RING_INC(i) ) 
     {
-        /*
-         * Bail if we've reached the batch allowance for thsi interface,
-         * or if we risk producing enough responses to overflow the
-         * communication ring.
-         */
-        if ( (max_to_do-- == 0) ||
-             ((atomic_read(&nr_pending) + resp_ring_ents) >
-              BLK_RESP_RING_MAX_ENTRIES) )
+        if ( max_to_do-- == 0 )
         {
             more_to_do = 1;
             break;
         }
         
-       switch ( blk_ring->req_ring[loop].operation )
+       switch ( blk_ring->ring[i].req.operation )
         {
        case XEN_BLOCK_READ:
        case XEN_BLOCK_WRITE:
-           dispatch_rw_block_io(task, loop);
+           dispatch_rw_block_io(p, i);
            break;
 
        case XEN_BLOCK_PROBE:
-           dispatch_probe_block_io(task, loop);
+           dispatch_probe_block_io(p, i);
            break;
 
        case XEN_BLOCK_DEBUG:
-           dispatch_debug_block_io(task, loop);
+           dispatch_debug_block_io(p, i);
            break;
 
        default:
            panic("error: unknown block io operation [%d]\n",
-                  blk_ring->req_ring[loop].operation);
+                  blk_ring->ring[i].req.operation);
        }
     }
 
-    blk_ring->req_cons = loop;
+    p->blk_req_cons = i;
     return more_to_do;
 }
 
@@ -255,20 +222,11 @@ static void dispatch_probe_block_io(struct task_struct *p, int index)
     extern void ide_probe_devices(xen_disk_info_t *xdi);
     blk_ring_t *blk_ring = p->blk_ring_base;
     xen_disk_info_t *xdi;
-    unsigned long flags, cpu_mask;
-    
-    xdi = phys_to_virt((unsigned long)blk_ring->req_ring[index].buffer);
-    
-    ide_probe_devices(xdi);
 
-    spin_lock_irqsave(&p->blk_ring_lock, flags);
-    blk_ring->resp_ring[blk_ring->resp_prod].id = blk_ring->req_ring[index].id;
-    blk_ring->resp_ring[blk_ring->resp_prod].status = 0;
-    blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod);
-    spin_unlock_irqrestore(&p->blk_ring_lock, flags);
+    xdi = phys_to_virt((unsigned long)blk_ring->ring[index].req.buffer);    
+    ide_probe_devices(xdi);
 
-    cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
-    guest_event_notify(cpu_mask); 
+    make_response(p, blk_ring->ring[index].req.id, 0);
 }
 
 static void dispatch_rw_block_io(struct task_struct *p, int index)
@@ -276,49 +234,45 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
     extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
     blk_ring_t *blk_ring = p->blk_ring_base;
     struct buffer_head *bh;
-    struct request_queue *rq;
     int operation;
-    blk_request_t *blk_request;
     
     /*
      * check to make sure that the block request seems at least
      * a bit legitimate
      */
-    if ( (blk_ring->req_ring[index].block_size & (0x200 - 1)) != 0 )
+    if ( (blk_ring->ring[index].req.block_size & (0x200 - 1)) != 0 )
        panic("error: dodgy block size: %d\n", 
-              blk_ring->req_ring[index].block_size);
+              blk_ring->ring[index].req.block_size);
     
-    if ( blk_ring->req_ring[index].buffer == NULL )
+    if ( blk_ring->ring[index].req.buffer == NULL )
        panic("xen_block: bogus buffer from guestOS\n"); 
 
-    DPRINTK("req_cons: %d  req_prod %d  index: %d, op: %s, pri: %s\n",
-            blk_ring->req_cons, blk_ring->req_prod, index, 
-            (blk_ring->req_ring[index].operation == XEN_BLOCK_READ ? 
-             "read" : "write"), 
-            (blk_ring->req_ring[index].priority == XEN_BLOCK_SYNC ? 
-             "sync" : "async"));
+    DPRINTK("req_cons: %d  req_prod %d  index: %d, op: %s\n",
+            p->blk_req_cons, blk_ring->req_prod, index, 
+            (blk_ring->ring[index].req.operation == XEN_BLOCK_READ ? 
+             "read" : "write"));
 
     atomic_inc(&nr_pending);
-    blk_request = kmem_cache_alloc(blk_request_cachep, GFP_ATOMIC);
-
-    /* we'll be doing this frequently, would a cache be appropriate? */
-    bh = (struct buffer_head *) kmalloc(sizeof(struct buffer_head), 
-                                       GFP_KERNEL);
+    bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
     if ( bh == NULL ) panic("bh is null\n");
 
     /* set just the important bits of the buffer header */
     memset (bh, 0, sizeof (struct buffer_head));
     
-    bh->b_blocknr       = blk_ring->req_ring[index].block_number;
-    bh->b_size          = blk_ring->req_ring[index].block_size; 
-    bh->b_dev           = blk_ring->req_ring[index].device; 
-    bh->b_rsector       = blk_ring->req_ring[index].sector_number;
+    bh->b_blocknr       = blk_ring->ring[index].req.block_number;
+    bh->b_size          = blk_ring->ring[index].req.block_size; 
+    bh->b_dev           = blk_ring->ring[index].req.device; 
+    bh->b_rsector       = blk_ring->ring[index].req.sector_number;
     bh->b_data          = phys_to_virt((unsigned long)
-                                      blk_ring->req_ring[index].buffer);
+                                      blk_ring->ring[index].req.buffer);
     bh->b_count.counter = 1;
-    bh->b_xen_request   = (void *)blk_request;  
-    
-    if ( blk_ring->req_ring[index].operation == XEN_BLOCK_WRITE )
+    bh->b_end_io        = end_block_io_op;
+
+    /* Save meta data about request. */
+    bh->b_xen_domain    = p;
+    bh->b_xen_id        = blk_ring->ring[index].req.id;
+
+    if ( blk_ring->ring[index].req.operation == XEN_BLOCK_WRITE )
     {
        bh->b_state = ((1 << BH_JBD) | (1 << BH_Mapped) | (1 << BH_Req) |
                       (1 << BH_Dirty) | (1 << BH_Uptodate));
@@ -330,15 +284,8 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
        operation = READ;
     }
 
-    /* save meta data about request */
-    blk_request->id     = blk_ring->req_ring[index].id;
-    blk_request->bh     = bh;
-    blk_request->domain = p; 
-    
-    /* dispatch single block request */
-    ll_rw_block(operation, 1, &bh);       /* linux top half */
-    rq = blk_get_queue(bh->b_rdev);                         
-    generic_unplug_device(rq);            /* linux bottom half */
+    /* Dispatch a single request. We'll flush it to disc later. */
+    ll_rw_block(operation, 1, &bh);
 }
 
 
@@ -347,6 +294,26 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
  * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
  */
 
+static void make_response(struct task_struct *p, void *id, unsigned long st)
+{
+    unsigned long cpu_mask, flags;
+    int position;
+    blk_ring_t *blk_ring;
+
+    /* Place on the response ring for the relevant domain. */ 
+    spin_lock_irqsave(&p->blk_ring_lock, flags);
+    blk_ring = p->blk_ring_base;
+    position = blk_ring->resp_prod;
+    blk_ring->ring[position].resp.id     = id;
+    blk_ring->ring[position].resp.status = st;
+    blk_ring->resp_prod = BLK_RING_INC(position);
+    spin_unlock_irqrestore(&p->blk_ring_lock, flags);
+    
+    /* Kick the relevant domain. */
+    cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
+    guest_event_notify(cpu_mask); 
+}
+
 static void dump_blockq(u_char key, void *dev_id, struct pt_regs *regs) 
 {
     printk("Dumping block queue stats: nr_pending = %d\n",
@@ -378,12 +345,9 @@ void initialize_block_io ()
     spin_lock_init(&io_schedule_list_lock);
     INIT_LIST_HEAD(&io_schedule_list);
 
-    blk_request_cachep = kmem_cache_create(
-        "blk_request_cache", sizeof(blk_request_t),
+    buffer_head_cachep = kmem_cache_create(
+        "buffer_head_cache", sizeof(struct buffer_head),
         0, SLAB_HWCACHE_ALIGN, NULL, NULL);
     
     add_key_handler('b', dump_blockq, "dump xen ide blkdev stats");     
 }
-
-
-
index 1bf198e1963c4a0ca93e46297142cf205e4de858..627055bf0bf1e0359fa086be1a64474f9698a67c 100644 (file)
 #define XEN_BLOCK_PROBE 8      /* determine io configuration from hypervisor */
 #define XEN_BLOCK_DEBUG 16                                          /* debug */
 
-#define XEN_BLOCK_SYNC  2
-#define XEN_BLOCK_ASYNC 3
-
-#define XEN_BLOCK_MAX_DOMAINS 32  /* NOTE: FIX THIS. VALUE SHOULD COME FROM? */
-
-#define BLK_REQ_RING_SIZE  64
-#define BLK_RESP_RING_SIZE 64
-
-#define BLK_REQ_RING_MAX_ENTRIES  (BLK_REQ_RING_SIZE - 2)
-#define BLK_RESP_RING_MAX_ENTRIES (BLK_RESP_RING_SIZE - 2)
-
-#define BLK_REQ_RING_INC(_i)     (((_i)+1) & (BLK_REQ_RING_SIZE-1))
-#define BLK_RESP_RING_INC(_i)    (((_i)+1) & (BLK_RESP_RING_SIZE-1))
-#define BLK_REQ_RING_ADD(_i,_j)  (((_i)+(_j)) & (BLK_REQ_RING_SIZE-1))
-#define BLK_RESP_RING_ADD(_i,_j) (((_i)+(_j)) & (BLK_RESP_RING_SIZE-1))
+#define BLK_RING_SIZE        128
+#define BLK_RING_MAX_ENTRIES (BLK_RING_SIZE - 2)
+#define BLK_RING_INC(_i)     (((_i)+1) & (BLK_RING_SIZE-1))
+#define BLK_RING_ADD(_i,_j)  (((_i)+(_j)) & (BLK_RING_SIZE-1))
 
 typedef struct blk_ring_req_entry 
 {
     void *          id;                /* for guest os use */
-    int             priority;          /* SYNC or ASYNC for now */
     int             operation;         /* XEN_BLOCK_READ or XEN_BLOCK_WRITE */
     char *          buffer;
     unsigned long   block_number;      /* block number */
@@ -57,10 +45,12 @@ typedef struct blk_ring_resp_entry
 
 typedef struct blk_ring_st 
 {
-  unsigned int      req_prod, req_cons;
-  unsigned int      resp_prod, resp_cons;
-  blk_ring_req_entry_t  req_ring[BLK_REQ_RING_SIZE];
-  blk_ring_resp_entry_t resp_ring[BLK_RESP_RING_SIZE];
+    unsigned int req_prod;  /* Request producer. Updated by guest OS. */
+    unsigned int resp_prod; /* Response producer. Updated by Xen.     */
+    union {
+        blk_ring_req_entry_t  req;
+        blk_ring_resp_entry_t resp;
+    } ring[BLK_RING_SIZE];
 } blk_ring_t;
 
 #define MAX_XEN_DISK_COUNT 100
index 03ea926af2d903b74520dd62850c51ce8e7f865f..3fbc78343a0fa7b23a5d27ca4f9d390942068bb4 100644 (file)
@@ -62,8 +62,8 @@ enum bh_state_bits {
                          * for private allocation by other entities
                          */
 };
+
 struct buffer_head {
-        struct buffer_head *b_next;     /* Hash queue list */
         unsigned long b_blocknr;        /* block number */
         unsigned short b_size;          /* block size */
         unsigned short b_list;          /* List that this buffer appears */
@@ -72,24 +72,18 @@ struct buffer_head {
         atomic_t b_count;               /* users using this block */
         kdev_t b_rdev;                  /* Real device */
         unsigned long b_state;          /* buffer state bitmap (see above) */
-        unsigned long b_flushtime;      /* Time when (dirty) buffer should be written */
 
-        struct buffer_head *b_next_free;/* lru/free list linkage */
-        struct buffer_head *b_prev_free;/* doubly linked list of buffers */
-        struct buffer_head *b_this_page;/* circular list of buffers in one page */
         struct buffer_head *b_reqnext;  /* request queue */
 
-        struct buffer_head **b_pprev;   /* doubly linked list of hash-queue */
         char * b_data;                  /* pointer to data block */
         struct pfn_info *b_page;            /* the page this bh is mapped to */
-        void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
-        void *b_private;                /* reserved for b_end_io */
+        void (*b_end_io)(struct buffer_head *bh, int uptodate);
 
         unsigned long b_rsector;        /* Real buffer location on disk */
 
-        struct inode *       b_inode;
-        struct list_head     b_inode_buffers;   /* doubly linked list of inode dirty buffers */
-        void *b_xen_request;                        /* xen request structure */
+        /* Both used by b_end_io function in xen_block.c */
+        void *b_xen_domain;
+        void *b_xen_id;
 };
 
 typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
@@ -127,15 +121,9 @@ static inline void mark_buffer_clean(struct buffer_head * bh)
 
 static inline void buffer_IO_error(struct buffer_head * bh)
 {
-    extern void end_block_io_op(struct buffer_head *bh);
-
     mark_buffer_clean(bh);
-    /*
-     * b_end_io has to clear the BH_Uptodate bitflag in the error case!
-     */
+    /* b_end_io has to clear the BH_Uptodate bitflag in the error case! */
     bh->b_end_io(bh, 0);
-    /* XXX KAF */
-    end_block_io_op(bh);
 }
 
 /**** XXX END OF BUFFER_HEAD STUFF XXXX ****/
index b1cd749e99fd2e119aec9c825311f788da11aa59..3cffa46bf18cc0f931b11efded4ab8b8e598fed8 100644 (file)
@@ -76,6 +76,7 @@ struct task_struct {
 
     /* Block I/O */
     blk_ring_t *blk_ring_base;
+    unsigned int blk_req_cons; /* request consumer */
     struct list_head blkdev_list;
     spinlock_t blk_ring_lock;
 
index 1083836248d1edbd91dae8ec9d03452b0397f6fc..0b77e5536e6570dc1e7c31e3685812488804b526 100644 (file)
@@ -1,3 +1,10 @@
+/******************************************************************************
+ * xl_block.c
+ * 
+ * Xenolinux virtual block-device driver.
+ * 
+ */
+
 #include <linux/config.h>
 #include <linux/module.h>
 
@@ -49,6 +56,7 @@ static int xlblk_max_sectors[XLBLK_MAX];
 #endif
 
 static blk_ring_t *blk_ring;
+static unsigned int resp_cons; /* Response consumer for comms ring. */
 static xen_disk_info_t xen_disk_info;
 
 int hypervisor_request(void *         id,
@@ -56,8 +64,7 @@ int hypervisor_request(void *         id,
                        char *         buffer,
                        unsigned long  block_number,
                        unsigned short block_size,
-                       kdev_t         device,
-                       int            mode);
+                       kdev_t         device);
 
 
 /* ------------------------------------------------------------------------
@@ -160,29 +167,29 @@ static int xenolinux_block_revalidate(kdev_t dev)
  * block_number:  block to read
  * block_size:  size of each block
  * device:  ide/hda is 768 or 0x300
- * mode: XEN_BLOCK_SYNC or XEN_BLOCK_ASYNC.  async requests
- *   will queue until a sync request is issued.
  */
 int hypervisor_request(void *         id,
                        int            operation,
                        char *         buffer,
                        unsigned long  block_number,
                        unsigned short block_size,
-                       kdev_t         device,
-                       int            mode)
+                       kdev_t         device)
 {
     int position;
-    void *buffer_pa, *buffer_ma; 
+    void *buffer_ma; 
     kdev_t phys_device = (kdev_t) 0;
     unsigned long sector_number = 0;
     struct gendisk *gd;     
 
-    /* Bail if there's no room in the request communication ring. */
-    if ( BLK_REQ_RING_INC(blk_ring->req_prod) == blk_ring->req_cons )
+    /*
+     * Bail if there's no room in the request communication ring. This may be 
+     * because we have a whole bunch of outstanding responses to process. No 
+     * matter, as the response handler will kick the request queue.
+     */
+    if ( BLK_RING_INC(blk_ring->req_prod) == resp_cons )
         return 1;
 
-    buffer_pa = (void *)virt_to_phys(buffer); 
-    buffer_ma = (void *)phys_to_machine((unsigned long)buffer_pa); 
+    buffer_ma = (void *)phys_to_machine(virt_to_phys(buffer)); 
 
     switch ( operation )
     {
@@ -209,18 +216,15 @@ int hypervisor_request(void *         id,
 
     /* Fill out a communications ring structure & trap to the hypervisor */
     position = blk_ring->req_prod;
-    blk_ring->req_ring[position].id            = id;
-    blk_ring->req_ring[position].priority      = mode;
-    blk_ring->req_ring[position].operation     = operation;
-    blk_ring->req_ring[position].buffer        = buffer_ma;
-    blk_ring->req_ring[position].block_number  = block_number;
-    blk_ring->req_ring[position].block_size    = block_size;
-    blk_ring->req_ring[position].device        = phys_device;
-    blk_ring->req_ring[position].sector_number = sector_number;
+    blk_ring->ring[position].req.id            = id;
+    blk_ring->ring[position].req.operation     = operation;
+    blk_ring->ring[position].req.buffer        = buffer_ma;
+    blk_ring->ring[position].req.block_number  = block_number;
+    blk_ring->ring[position].req.block_size    = block_size;
+    blk_ring->ring[position].req.device        = phys_device;
+    blk_ring->ring[position].req.sector_number = sector_number;
 
-    blk_ring->req_prod = BLK_REQ_RING_INC(blk_ring->req_prod);
-
-    if ( mode == XEN_BLOCK_SYNC ) HYPERVISOR_block_io_op();
+    blk_ring->req_prod = BLK_RING_INC(position);
 
     return 0;
 }
@@ -258,8 +262,7 @@ static void do_xlblk_request (request_queue_t *rq)
        {
             full = hypervisor_request(
                 bh, (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
-                bh->b_data, bh->b_rsector, bh->b_size, 
-                bh->b_dev, XEN_BLOCK_ASYNC);
+                bh->b_data, bh->b_rsector, bh->b_size, bh->b_dev);
             
             if ( full ) goto out;
 
@@ -313,15 +316,15 @@ static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
     
     spin_lock_irqsave(&io_request_lock, flags);            
 
-    for ( i = blk_ring->resp_cons;
+    for ( i  = resp_cons;
          i != blk_ring->resp_prod;
-         i = BLK_RESP_RING_INC(i) )
+         i  = BLK_RING_INC(i) )
     {
-       blk_ring_resp_entry_t *bret = &blk_ring->resp_ring[i];
+       blk_ring_resp_entry_t *bret = &blk_ring->ring[i].resp;
         if ( (bh = bret->id) != NULL ) bh->b_end_io(bh, 1);
     }
     
-    blk_ring->resp_cons = i;
+    resp_cons = i;
 
     /* KAF: We can push work down at this point. We have the lock. */
     do_xlblk_request(BLK_DEFAULT_QUEUE(MAJOR_NR));
@@ -336,9 +339,7 @@ int __init xlblk_init(void)
 
     /* This mapping was created early at boot time. */
     blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
-
-    blk_ring->req_prod = blk_ring->req_cons = 0;
-    blk_ring->resp_prod = blk_ring->resp_cons = 0;
+    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = 0;
     
     error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int, 0, 
                        "xlblk-response", NULL);
@@ -351,8 +352,9 @@ int __init xlblk_init(void)
     xen_disk_info.count = 0;
 
     if ( hypervisor_request(NULL, XEN_BLOCK_PROBE, (char *) &xen_disk_info,
-                            0, 0, (kdev_t) 0, XEN_BLOCK_SYNC) )
+                            0, 0, (kdev_t) 0) )
         BUG();
+    HYPERVISOR_block_io_op();
     while ( blk_ring->resp_prod != 1 ) barrier();
     for ( i = 0; i < xen_disk_info.count; i++ )
     { 
index 4d09a10409a24bd6b70be4a5edc993d78bd174c9..2ddef271e562bf655c4b6dd36464466974931648 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/delay.h>
 
 #include <asm/hypervisor-ifs/block.h>
 #include <asm/hypervisor-ifs/hypervisor-if.h>
@@ -127,7 +128,6 @@ int proc_write_bdt(struct file *file, const char *buffer,
   int  block_number = 0;
   int  block_size = 0;
   int  device = 0;
-  int  mode;
 
   if (copy_from_user(local, buffer, count))
   {
@@ -158,16 +158,6 @@ int proc_write_bdt(struct file *file, const char *buffer,
     return -EINVAL;
   }
 
-  if (opcode == 'r' || opcode == 'w' ||
-      opcode == 'd' || opcode == 'D')
-  {
-    mode = XEN_BLOCK_SYNC;
-  }
-  else /* (opcode == 'R' || opcode == 'W') */
-  {
-    mode = XEN_BLOCK_ASYNC;
-  }
-
   if (data)
   {
     kfree(data);
@@ -187,7 +177,9 @@ int proc_write_bdt(struct file *file, const char *buffer,
   /* submit request */
   hypervisor_request(0, meta.operation, meta.buffer, 
                     meta.block_number, meta.block_size,
-                    meta.device, mode);
+                    meta.device);
+  HYPERVISOR_block_io_op();
+  mdelay(1000); /* should wait for a proper acknowledgement/response. */
 
   kfree(local);
   return count;